# Dickstein, Ho, and Mark (2023)
# Create plots of spending and probability of choosing a gold plan across different lambda levels
# to show that moral hazard and risk aversion is higher for SG than IM

# Preliminaries
setwd("../../../../library")
source("PreliminariesCode.R")

data_location <- paste0(project_folder, "/data")
counterfactual_folder <- paste0(project_folder, "/analysis/counterfactuals")
build_location <- paste0(project_folder, "/analysis/tablesandfigures/build/identification")
release_location <- paste0(project_folder, "/analysis/tablesandfigures/release/identification")
dir.create(build_location, recursive = T)
dir.create(release_location, recursive = T)

find_names <- function(name_vec, start){
  unlist(lapply(strsplit(name_vec[which(type_param_vec %in% start)], "\\."), function(x) x[3]))
}

# Are you providing your own parameters?
load_params = F

# Are the parameters being created below or somewhere else?
create_param_mat <- T

# which Rating Areas?
ravec = 1:7

# which years?
yrvec = 2016

# What percent of the premium is covered by an employer?
nu = .65

# What is the behavioral share
beshr <- .01

# What alpha-psi cutoff to use: 
alphampsicutoff <- .05

# What ACG cutoff to use: 
acgHL_boundary <- 1.76

# what is the base markup?
mrkup_0 <- 0

# Which spec do you want to use?
spectouse <- "lowrisk_simplemh_censor0.2"

# ~ # ~ # V IF YOU ARE CREATING PARAM MAT HERE V # ~ # ~ #
if(create_param_mat == T){
  
  # ~ # ~ # ~ # ~ # ~ # ~ # ~ # ~ #
  # Load base parameter values    #
  # ~ # ~ # ~ # ~ # ~ # ~ # ~ # ~ #
  
  # Loading Individual Market Estimated Parameters:
  IndMarketEstimates_0 <- fread(paste0(project_folder, "/analysis/estimationcode/specs/", spectouse, "_main/output/solution.csv"))
  IndMarketEstimates_0 <- IndMarketEstimates_0[, -c(1,2)]
  IndMarketlabels_0 <- fread(paste0(project_folder, "/analysis/estimationcode/specs/", spectouse, "_main/output/covar_lens.csv"))
  IndMarSE_0 <- cumsum(IndMarketlabels_0$x)
  NamedBetaList_0 <- list(
    beta1 = as.numeric(IndMarketEstimates_0[1, 1:IndMarSE_0[1]]),
    beta2 = as.numeric(IndMarketEstimates_0[1, (1 + IndMarSE_0[1]):IndMarSE_0[2]]),
    beta3 = as.numeric(IndMarketEstimates_0[1, (1 + IndMarSE_0[2]):IndMarSE_0[3]]),
    beta0 = as.numeric(IndMarketEstimates_0[1, (1 + IndMarSE_0[3]):IndMarSE_0[4]])
  )
  names(NamedBetaList_0[[1]]) <- names(IndMarketEstimates_0)[1:IndMarSE_0[1]]
  names(NamedBetaList_0[[2]]) <- names(IndMarketEstimates_0)[(1 + IndMarSE_0[1]):IndMarSE_0[2]]
  names(NamedBetaList_0[[3]]) <- names(IndMarketEstimates_0)[(1 + IndMarSE_0[2]):IndMarSE_0[3]]
  names(NamedBetaList_0[[4]]) <- names(IndMarketEstimates_0)[(1 + IndMarSE_0[3]):IndMarSE_0[4]]
  
  # Loading Estimated Parameters for Switchers:
  SGMarketEstimates_0 <- fread(paste0(project_folder, "/analysis/estimationcode/specs/", spectouse, "_switcher/output/solution.csv"))
  SGMarketEstimates_0 <- SGMarketEstimates_0[, -c(1,2)]
  SGMarketlabels_0 <- fread(paste0(project_folder, "/analysis/estimationcode/specs/", spectouse, "_switcher/output/covar_lens.csv"))
  SGMarSE_0 <- cumsum(SGMarketlabels_0$x)
  SGNamedBetaList_0 <- list(
    beta1 = as.numeric(SGMarketEstimates_0[1, 1:SGMarSE_0[1]]),
    beta2 = as.numeric(SGMarketEstimates_0[1, (1 + SGMarSE_0[1]):SGMarSE_0[2]]),
    beta3 = as.numeric(SGMarketEstimates_0[1, (1 + SGMarSE_0[2]):SGMarSE_0[3]]),
    beta0 = as.numeric(SGMarketEstimates_0[1, (1 + SGMarSE_0[3]):SGMarSE_0[4]])
  )
  names(SGNamedBetaList_0[[1]]) <- names(SGMarketEstimates_0)[1:SGMarSE_0[1]]
  names(SGNamedBetaList_0[[2]]) <- names(SGMarketEstimates_0)[(1 + SGMarSE_0[1]):SGMarSE_0[2]]
  names(SGNamedBetaList_0[[3]]) <- names(SGMarketEstimates_0)[(1 + SGMarSE_0[2]):SGMarSE_0[3]]
  names(SGNamedBetaList_0[[4]]) <- names(SGMarketEstimates_0)[(1 + SGMarSE_0[3]):SGMarSE_0[4]]
  
  # Loading supply size parameters 
  SupplySideCoefficients_0 <-fread(paste0("~/sharedWork/oregon/Analysis_NDM/tablesandfigures/release/prem_setting/tables/pr_paperfull_best_guess_ra_s_wsimcost.csv"))
  SupplySideData_0 <- data.table(
    expand.grid(payer_id = c("M0013", "M0019", "M0035", "M0062", "M0077", "M0080"),
                year = c("2015", "2016"))
  )
  
  # Create beta_{5j}'A_j
  names(SupplySideCoefficients_0) <- c("var", "mod1", "mod2", "mod3", "mod4")
  SupplySideCoefficients_0[, year := ifelse(grepl("2015", var), "2015", NA), ]
  SupplySideCoefficients_0[, year := ifelse(grepl("2016", var), "2016", year), ]
  SupplySideCoefficients_0[, payer_id := ifelse(grepl("M0013", var), "M0013", NA), ]
  SupplySideCoefficients_0[, payer_id := ifelse(grepl("M0019", var), "M0019", payer_id), ]
  SupplySideCoefficients_0[, payer_id := ifelse(grepl("M0035", var), "M0035", payer_id), ]
  SupplySideCoefficients_0[, payer_id := ifelse(grepl("M0062", var), "M0062", payer_id), ]
  SupplySideCoefficients_0[, payer_id := ifelse(grepl("M0077", var), "M0077", payer_id), ]
  SupplySideCoefficients_0[, payer_id := ifelse(grepl("M0080", var), "M0080", payer_id), ]
  constant_0 <- as.numeric(SupplySideCoefficients_0[var == "Constant"]$mod3)
  
  SupplySideCoefficients2015_0 <- SupplySideCoefficients_0[year == "2015" | is.na(year)]
  SupplySideCoefficients2015_0 <- SupplySideCoefficients2015_0[ , .(
    fe_beta_5 = sum(as.numeric(mod3)) + constant_0,
    year = "2015"
  ), by = c("payer_id")]
  
  SupplySideCoefficients2016_0 <- SupplySideCoefficients_0[year == "2016" | is.na(year)]
  SupplySideCoefficients2016_0 <- SupplySideCoefficients2016_0[ , .(
    fe_beta_5 = sum(as.numeric(mod3)) + constant_0,
    year = "2016"
  ), by = c("payer_id")]
  
  SupplySideFEs_0 <- rbind(SupplySideCoefficients2015_0, SupplySideCoefficients2016_0)
    SupplySideData_0 <- merge(
    SupplySideData_0,
    SupplySideFEs_0,
    by = c("payer_id", "year"),
    all.x = T
  )
  
  # Create beta_4
  SupplySideData_0$beta_4 <- as.numeric(SupplySideCoefficients_0[var == "Medical costs"]$mod3)
  
}

# Run base parameters to get the ExplodedDatasets
load_base_data <- TRUE
NamedBetaList <- NamedBetaList_0
SGNamedBetaList <- SGNamedBetaList_0
SupplySideData <- SupplySideData_0
mrkup <- mrkup_0

source(paste0(counterfactual_folder, "/library/DA03bRunCounterfactualGetdataready.R")) 

#########################################
# 2. Create Moral Hazard Plots
#########################################

# Import allsubscribers dataset
allsubs <- fread(paste0(data_location, "/all_markets_subscribers.csv"))

# Collapse ExplodedData to unique subscriberid, year, alpha, omega
ExplodedData_SG_small <- select(ExplodedData_SG, subscriberid, year, alpha, omega, x_av, silver_sub)
ExplodedData_Ind_small <- select(ExplodedData_Ind, subscriberid, year, alpha, omega, x_av, silver_sub)

# Find the switchers 
switchers <- fread(paste0(data_location, "/explodeddata.csv"))
switchers_small <- select(switchers, subscriberid, year, switcher)
switchers_small_nodup <- switchers_small[!duplicated(switchers_small),]
ExplodedData_SG_switchers <- merge(ExplodedData_Ind, switchers_small_nodup, by=c("subscriberid", "year"))
ExplodedData_SG_switchers <- ExplodedData_SG_switchers[ExplodedData_SG_switchers$switcher == 1, ]

# Select relevant columns.
ExplodedData_SG_switchers <- select(ExplodedData_SG_switchers, subscriberid, year, x_av, silver_sub, cons, low_sum_risk, over50) 
allsubs_SG_merged_switchers <- merge(allsubs, ExplodedData_SG_switchers, by=c("subscriberid", "year"))

# Create new "metal" column
allsubs_SG_merged_switchers$x_tier <- allsubs_SG_merged_switchers$best_guess_metal
allsubs_SG_merged_switchers$x_tier[allsubs_SG_merged_switchers$best_guess_metal == "3" & allsubs_SG_merged_switchers$silver_sub == TRUE] <- "1"

# Merge onto allsubscribers dataset based on subscriberid, year
allsubs_SG_merged <- merge(allsubs, ExplodedData_SG_small, by=c("subscriberid", "year"))
allsubs_Ind_merged <- merge(allsubs, ExplodedData_Ind_small, by=c("subscriberid", "year"))

# Create new "metal" column that separates each x actuarial value
allsubs_SG_merged$x_tier <- allsubs_SG_merged$best_guess_metal
allsubs_SG_merged$x_tier[allsubs_SG_merged$best_guess_metal == "3" & allsubs_SG_merged$silver_sub == TRUE] <- "1"
allsubs_Ind_merged$x_tier <- allsubs_Ind_merged$best_guess_metal
allsubs_Ind_merged$x_tier[allsubs_Ind_merged$best_guess_metal == "3" & allsubs_Ind_merged$silver_sub == TRUE] <- "1"

# Change x_tier to numeric
allsubs_SG_merged_switchers$x_tier <- as.numeric(allsubs_SG_merged_switchers$x_tier)
allsubs_Ind_merged$x_tier <- as.numeric(allsubs_Ind_merged$x_tier)
allsubs_SG_merged$x_tier <- as.numeric(allsubs_SG_merged$x_tier)

# Redefine alpha, omega for the switchers (because the alpha currently comes from Ind formula, which isnt right)
allsubs_SG_merged_switchers$alpha <- exp(as.matrix(allsubs_SG_merged_switchers[, names(SGNamedBetaList$beta1), with = F]) %*% SGNamedBetaList$beta1)
allsubs_SG_merged_switchers$psi <- exp(as.matrix(allsubs_SG_merged_switchers[, names(SGNamedBetaList$beta3), with = F]) %*% SGNamedBetaList$beta3)

# Remove outliers. We only keep observations that satisfy alpha - psi > .05 in that particular group
allsubs_SG_merged_switchers <- allsubs_SG_merged_switchers[allsubs_SG_merged_switchers$alpha > (alphampsicutoff + .2667674), ]
allsubs_SG_merged <- allsubs_SG_merged[allsubs_SG_merged$alpha > 0.2719097, ]
allsubs_Ind_merged <- allsubs_Ind_merged[allsubs_Ind_merged$alpha > 0.1622922, ]

#########################################
print("Moral Hazard Plots: Switchers")
for(i in 2:length(unique(allsubs_SG_merged_switchers$x_tier))){
  print(paste0("We are on tier", i))
  
  # Subset to tier
  allsubs_SG_grouped <- allsubs_SG_merged_switchers[allsubs_SG_merged_switchers$x_tier == i]
  allsubs_Ind_grouped <- allsubs_Ind_merged[allsubs_Ind_merged$x_tier == i]
  
  # Bin households into groups based on alpha (each bin is a quintile)
  allsubs_SG_grouped <- allsubs_SG_grouped %>%
    mutate(alpha_quantileSG = ntile(alpha, 20))
  allsubs_Ind_grouped <- allsubs_Ind_grouped %>%
    mutate(alpha_quantileInd = ntile(alpha, 20))
  
  # Calculate observed spending for SG, IM
  allsubs_SG_grouped_spending <- allsubs_SG_grouped %>%
    group_by(alpha_quantileSG)  %>%
    summarize(expected_lambda = mean(1/alpha))
  allsubs_Ind_grouped_spending <- allsubs_Ind_grouped %>%
    group_by(alpha_quantileInd)  %>%
    summarize(expected_lambda = mean(1/alpha))
  
  # Merge observed spending back into original dataset
  allsubs_SG_grouped <- merge(allsubs_SG_grouped_spending, allsubs_SG_grouped, by=c("alpha_quantileSG"))
  allsubs_Ind_grouped <- merge(allsubs_Ind_grouped_spending, allsubs_Ind_grouped, by=c("alpha_quantileInd"))
  
  # Change into data tables
  setDT(allsubs_SG_grouped)
  setDT(allsubs_Ind_grouped)
  
  # Calculate spending for every quantile
  allsubs_SG_grouped_totspend <- allsubs_SG_grouped %>%
    group_by(alpha_quantileSG)  %>%
    summarize(median_spending = median(totpaidmonth_span))
  allsubs_Ind_grouped_totspend <- allsubs_Ind_grouped %>%
    group_by(alpha_quantileInd)  %>%
    summarize(median_spending = median(totpaidmonth_span))
  
  # Merge spending back into original dataset
  allsubs_SG_grouped <- merge(allsubs_SG_grouped_totspend, allsubs_SG_grouped, by=c("alpha_quantileSG"))
  allsubs_Ind_grouped <- merge(allsubs_Ind_grouped_totspend, allsubs_Ind_grouped, by=c("alpha_quantileInd"))
  
  # Only keep relevant columns
  allsubs_SG_grouped_small <- select(allsubs_SG_grouped, expected_lambda, median_spending)
  allsubs_Ind_grouped_small <- select(allsubs_Ind_grouped, expected_lambda, median_spending)
  allsubs_SG_grouped_small$expl_market <- "SG"
  allsubs_Ind_grouped_small$expl_market <- "Ind"
  
  # Combine SG and Ind small datasets
  allsubs_grouped_small <- rbind(allsubs_SG_grouped_small, allsubs_Ind_grouped_small)
  allsubs_grouped_small_nodup <- allsubs_grouped_small[!duplicated(allsubs_grouped_small), ]
  
  # Create subsets for SG, Ind again so that we can make best fit line on scatterplot
  allsubs_small_nodup_SG <- allsubs_grouped_small_nodup[allsubs_grouped_small_nodup$expl_market =="SG", ]
  allsubs_small_nodup_Ind <- allsubs_grouped_small_nodup[allsubs_grouped_small_nodup$expl_market == "Ind", ]
  
  png(filename=paste0(release_location, "/moral_hazard_plot_switchers_metal", i, ".png"))
  plot(allsubs_grouped_small_nodup$expected_lambda, allsubs_grouped_small_nodup$median_spending, col=c("red","blue")[as.factor(allsubs_grouped_small_nodup$expl_market)])
  legend(x = "topright", legend = levels(as.factor(allsubs_grouped_small_nodup$expl_market)), col=c("red","blue"), pch=1)
  abline(lm(allsubs_small_nodup_SG$median_spending ~ allsubs_small_nodup_SG$expected_lambda), col = "blue")
  abline(lm(allsubs_small_nodup_Ind$median_spending ~ allsubs_small_nodup_Ind$expected_lambda), col = "red")
  dev.off()
  
  # export as .dta file to make graphs in stata
  write.dta(allsubs_grouped_small_nodup, file = paste0(build_location, "/moralhazard_switchers_metal", i, ".dta"))
}

#########################################
# 3. Risk Aversion Plots
#########################################

print("Risk Aversion Plots: Switchers")

# Created expected medical spending variable
setDT(allsubs_SG_merged_switchers)
setDT(allsubs_Ind_merged)

# Bin households into groups based on alpha (and thus lambda) (each bin is a quintile)
allsubs_SG_merged_switchers <-  allsubs_SG_merged_switchers %>%
  mutate(alpha_quantileSG = ntile(alpha, 20))
allsubs_Ind_merged <- allsubs_Ind_merged %>%
  mutate(alpha_quantileInd = ntile(alpha, 20))

# Find central lambda corresponding to each alpha_quantile
allsubs_SG_grouped_spending <- allsubs_SG_merged_switchers %>%
  group_by(alpha_quantileSG)  %>%
  summarize(expected_lambda = mean(1/alpha))
allsubs_Ind_grouped_spending <- allsubs_Ind_merged %>%
  group_by(alpha_quantileInd)  %>%
  summarize(expected_lambda = mean(1/alpha))

# Merge expected lambda back into original dataset
allsubs_SG_merged_switchers <- merge(allsubs_SG_grouped_spending, allsubs_SG_merged_switchers, by=c("alpha_quantileSG"))
allsubs_Ind_merged <- merge(allsubs_Ind_grouped_spending, allsubs_Ind_merged, by=c("alpha_quantileInd"))

# Create probability choose gold plan variable
setDT(allsubs_SG_merged_switchers)
setDT(allsubs_Ind_merged)
allsubs_SG_merged_switchers[, gold := ifelse(x_tier == 4, 1, 0)]
allsubs_Ind_merged[, gold := ifelse(x_tier == 4, 1, 0)]
allsubs_SG_grouped_gold <- allsubs_SG_merged_switchers %>%
  group_by(alpha_quantileSG)  %>%
  summarize(prob_gold = mean(gold))
allsubs_Ind_grouped_gold <- allsubs_Ind_merged %>%
  group_by(alpha_quantileInd)  %>%
  summarize(prob_gold = mean(gold))
allsubs_SG_merged_switchers <- merge(allsubs_SG_grouped_gold, allsubs_SG_merged_switchers, by=c("alpha_quantileSG"))
allsubs_Ind_merged <- merge(allsubs_Ind_grouped_gold, allsubs_Ind_merged, by=c("alpha_quantileInd"))

# Only keep relevant columns
allsubs_SG_merged_small <- select(allsubs_SG_merged_switchers, expected_lambda, prob_gold)
allsubs_Ind_merged_small <- select(allsubs_Ind_merged, expected_lambda, prob_gold)

# Add SG, Ind identifier
allsubs_SG_merged_small$expl_market <- "SG"
allsubs_Ind_merged_small$expl_market <- "Ind"

# Combine SG and Ind small datasets
allsubs_merged_small <- rbind(allsubs_SG_merged_small, allsubs_Ind_merged_small)
allsubs_merged_small_nodup <- allsubs_merged_small[!duplicated(allsubs_merged_small), ]

# create subsets for SG, Ind again so that we can make best fit line on scatterplot
allsubs_small_nodup_SG <- allsubs_merged_small_nodup[allsubs_merged_small_nodup$expl_market =="SG", ]
allsubs_small_nodup_Ind <- allsubs_merged_small_nodup[allsubs_merged_small_nodup$expl_market == "Ind", ]

png(filename=paste0(release_location, "/risk_aversion_plot_gold_switchers.png"))
plot(allsubs_merged_small_nodup$expected_lambda, allsubs_merged_small_nodup$prob_gold, col=c("red","blue")[as.factor(allsubs_merged_small_nodup$expl_market)])
legend(x = "topright", legend = levels(as.factor(allsubs_merged_small_nodup$expl_market)), col=c("red","blue"), pch=1)
abline(lm(allsubs_small_nodup_SG$prob_gold ~ allsubs_small_nodup_SG$expected_lambda), col = "blue")
abline(lm(allsubs_small_nodup_Ind$prob_gold ~ allsubs_small_nodup_Ind$expected_lambda), col = "red")
dev.off()

# We export risk aversion dataset as .dta file to make graph in STATA
write.dta(allsubs_merged_small_nodup, file = paste0(build_location, "/riskaversion_switchers.dta"))
